{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-24T14:39:35.909604Z",
     "start_time": "2021-11-24T14:39:35.893604Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "120"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "824464"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def gen():\n",
    "    for x in range(100000):\n",
    "        yield x\n",
    "        \n",
    "# 生成器\n",
    "my_gen = gen()\n",
    "# 列表\n",
    "my_list = [x for x in range(100000)]\n",
    "\n",
    "import sys\n",
    "\n",
    "sys.getsizeof(my_gen)\n",
    "\n",
    "sys.getsizeof(my_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-24T14:39:35.953604Z",
     "start_time": "2021-11-24T14:39:35.912604Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "87624"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "120"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 列表推导式\n",
    "my_list = [x for x in range(10000)]\n",
    "\n",
    "# 生成器表达式\n",
    "my_gen  = (x for x in range(10000))\n",
    "\n",
    "sys.getsizeof(my_list)\n",
    "\n",
    "sys.getsizeof(my_gen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-24T14:40:27.951604Z",
     "start_time": "2021-11-24T14:39:35.956604Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('./中兴_2019-09忙时.csv',encoding = 'gbk',engine = 'python')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-24T14:45:44.113604Z",
     "start_time": "2021-11-24T14:45:28.897604Z"
    },
    "code_folding": []
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Invalid file path or buffer object type: <class 'pandas.core.frame.DataFrame'>",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-9-f04247452bc2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdfli\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'./out.csv'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'w'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mnewline\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m     \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal)\u001b[0m\n\u001b[0;32m   3199\u001b[0m             \u001b[0mdoublequote\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdoublequote\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3200\u001b[0m             \u001b[0mescapechar\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mescapechar\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3201\u001b[1;33m             \u001b[0mdecimal\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdecimal\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3202\u001b[0m         )\n\u001b[0;32m   3203\u001b[0m         \u001b[0mformatter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\io\\formats\\csvs.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, obj, path_or_buf, sep, na_rep, float_format, cols, header, index, index_label, mode, encoding, compression, quoting, line_terminator, chunksize, quotechar, date_format, doublequote, escapechar, decimal)\u001b[0m\n\u001b[0;32m     64\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     65\u001b[0m         self.path_or_buf, _, _, _ = get_filepath_or_buffer(\n\u001b[1;32m---> 66\u001b[1;33m             \u001b[0mpath_or_buf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcompression\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcompression\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     67\u001b[0m         )\n\u001b[0;32m     68\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msep\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_filepath_or_buffer\u001b[1;34m(filepath_or_buffer, encoding, compression, mode)\u001b[0m\n\u001b[0;32m    198\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_file_like\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    199\u001b[0m         \u001b[0mmsg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34mf\"Invalid file path or buffer object type: {type(filepath_or_buffer)}\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 200\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    202\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcompression\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: Invalid file path or buffer object type: <class 'pandas.core.frame.DataFrame'>"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "# def read_csv_partly(file):\n",
    "#     file_data = pd.read_csv(file, engine='python', encoding='gbk', chunksize = 50000)\n",
    "#     for df_tmp in file_data:\n",
    "#         yield df_tmp\n",
    "\n",
    "li = (df_tmp for df_tmp in pd.read_csv('./中兴_2019-09忙时.csv', engine='python', encoding='gbk', chunksize = 50000))\n",
    "\n",
    "dfli = []\n",
    "for i,df in enumerate(li):\n",
    "    if i == 6:\n",
    "        break\n",
    "    dfli.append(df)\n",
    "res = pd.concat(dfli,axis = 0)\n",
    "with open('./out.csv','w',newline='') as f:\n",
    "    res.to_csv(f, index =False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
